L05 Annotation & Positioning

Data Visualization (STAT 302)

Author

DAVIS JOHNSON

Load Package(s) and Datasets

We’ll be using the blue_jays.rda, titanic.rda, Aus_athletes.rda, and tech_stocks.rda datasets.

Code
# Load package(s)

library(tidyverse)
library(ggrepel)
library(cowplot)
library(patchwork)

# Load data

load("data/blue_jays.rda")
load("data/titanic.rda")
load("data/Aus_athletes.rda")
load("data/tech_stocks.rda")

Exercise 1

Using the blue_jays.rda dataset, recreate the following graphic as precisely as possible.

Hints:

  • Transparency is 0.8
  • Point size 2
  • Create a label_info dataset that is a subset of original data, just with the 2 birds to be labeled
  • Shift label text horizontally by 0.5
  • See ggplot2 textbook 8.3 building custom annotations
  • Annotation size is 4
  • Classic theme
Solution
Code
#data set for top head size for each sex
label_info <- blue_jays %>% 
  #largest to smallest
  arrange(desc(Head)) %>%
  # group by sex
  group_by(KnownSex) %>% 
  #take the top 2 head sizes  for each group, to account for female point
  top_n(n = 2, wt = Head)
Code
#caption 
caption <- paste("Head length versus body mass for", nrow(blue_jays), "blue jays")

# M and F labels
Labels <- label_info[c(1,4),]

#make plot 
ggplot(data = blue_jays, aes(x = Mass, y = Head, color = KnownSex)) +
  geom_point(alpha = 0.8, size = 2) +
  annotate("text", label = caption, hjust = 0, vjust = 1, size = 4) +
  labs(x =" Body mass (g)", y = "Head length (mm)") +
  geom_text(data = Labels, aes(label = KnownSex), nudge_x = 0.5) +
  guides(color = "none") +
  theme_classic()

Exercise 2

Using the tech_stocks dataset, recreate the following graphics as precisely as possible. Use the column price_indexed.

Plot 1

Hints:

  • Create a label_info dataset that is a subset of original data, just containing the last day’s information for each of the 4 stocks
  • serif font
  • Annotation size is 4
Solution
Code
# df with most recent stock info
label_info <- tech_stocks %>%
  ungroup() %>%
  #arrange by date
  arrange(desc(date)) %>%
  distinct(company, .keep_all = TRUE) 
Code
#caption 
caption <- paste("Stock price over time for four major tech companies")

#range for x and y variables 
xrange <- range(tech_stocks$date)
yrange <- range(tech_stocks$price_indexed)

tech_stocks <- tech_stocks %>%
  ungroup()

#lineplot
ggplot(data = tech_stocks, aes(x = date, y = price_indexed)) +
  geom_line(aes(color = company)) +
  # remove all legends 
  #theme(legend.position  =  "none") +
  #remove x label 
  xlab(NULL) +
  ylab("Stock price, indexed") +
  annotate("text", x = xrange[1], y = yrange[2] , label = caption, hjust = 0, vjust = 1, family = "serif", size = 4) +
  #company labels
  geom_text(data = label_info, aes(label = company)) +
  guides(color = "none") +
  theme_minimal()

Plot 2

Hints:

  • Package ggrepel
    • box.padding is 0.6
    • Minimum segment length is 0
    • Horizontal justification is to the right
    • seed of 9876
  • Annotation size is 4
  • serif font
Solution
Code
# lineplot 
ggplot(data = tech_stocks, aes(x = date, y = price_indexed)) +
  geom_line(aes(color = company)) +
  xlab(NULL) +
  ylab("Stock price, indexed") +
  annotate("text", x = xrange[1], y = yrange[2] , label = caption, hjust = 0, vjust = 1, family = "serif", size = 4) +
  #company labels
  geom_text_repel(data = label_info, aes(label = company), box.padding = 0.6, min.segment.length = 0, hjust = 1, seed = 9876) +
  guides(color = "none") +
  theme_minimal()

Exercise 3

Using the titanic.rda dataset, recreate the following graphic as precisely as possible.

Hints:

  • Create a new variable that uses died and survived as levels/categories
  • Hex colors: #D55E00D0, #0072B2D0 (no alpha is being used)
Solution
Code
ggplot(data = titanic, aes(x = sex, fill = sex)) +
  geom_bar() +
  facet_grid(factor(survived, labels = c("died", "survived"))~class) +
  scale_fill_manual(values = c("#D55E00D0", "#0072B2D0")) + 
  theme_minimal() +
  theme(legend.position = "none")

Exercise 4

Use the athletes_dat dataset — extracted from Aus_althetes.rda — to recreate the following graphic as precisely as possible. Create the graphic twice: once using patchwork and once using cowplot.

Code
# Get list of sports played by BOTH sexes
both_sports <- Aus_athletes %>%
  # dataset of columns sex and sport 
  # only unique observations
  distinct(sex, sport) %>%
  # see if sport is played by one gender or both
  count(sport) %>%
  # only want sports played by BOTH sexes
  filter(n == 2) %>%
  # get list of sports
  pull(sport)

# Process data
athletes_dat <- Aus_athletes %>%
  # only keep sports played by BOTH sexes
  filter(sport %in% both_sports) %>%
  # rename track (400m) and track (sprint) to be track
  # case_when will be very useful with shiny apps
  mutate(
    sport = case_when(
      sport == "track (400m)" ~ "track",
      sport == "track (sprint)" ~ "track",
      TRUE ~ sport
      )
    )

Hints:

  • Build each plot separately
  • Bar plot: lower limit 0, upper limit 95
  • Bar plot: shift bar labels by 5 units and top justify
  • Bar plot: label size is 5
  • Bar plot: #D55E00D0 & #0072B2D0 — no alpha
  • Scatterplot: #D55E00D0 & #0072B2D0 — no alpha
  • Scatterplot: filled circle with “white” outline; size is 3
  • Scatterplot: rcc is red blood cell count; wcc is white blood cell count
  • Boxplot: outline #D55E00 and #0072B2; shading #D55E0040 and #0072B240
  • Boxplot: should be made narrower; 0.5
  • Boxplot: Legend is in top-right corner of bottom plot
  • Boxplot: Space out labels c("female ", "male")
  • Boxplot: Legend shading matches hex values for top two plots
Solution
Code
barplot <- ggplot(data = athletes_dat, aes(x = sex, fill = sex)) + 
  geom_bar(show.legend = FALSE)+
  scale_fill_manual(values = c("#D55E00D0", "#0072B2D0")) +
  scale_x_discrete(name = NULL, labels = c("female", "male")) +
  scale_y_continuous(name = "number", breaks = seq(0, 100, 25), limits = c(0, 95)
  ) + 
 geom_text(stat = "count", aes(label = after_stat(count)), position = position_stack(vjust = 0.9), size = 5) +
  theme_minimal()
Code
scatterplot <-ggplot(data = athletes_dat, aes(x = rcc, y = wcc)) +
  geom_point(aes(fill = sex), shape = 21, color = "white", size = 3, show.legend = FALSE) + 
  scale_fill_manual(values = c("#D55E00D0", "#0072B2D0")) + 
  labs(x = "RBC count", y = "WBC count") +
  theme_minimal()
Code
boxplot <- ggplot(data = athletes_dat, aes(x = sport, y = pcBfat)) +
  geom_boxplot(aes(color = sex, fill = sex) , width = 0.5) +
  scale_fill_manual(name = NULL, labels = c("female", "male") , values = c("#D55E0040", "#0072B240")) + 
  scale_color_manual(name = NULL, labels = c("female", "male"), values = c("#D55E00", "#0072B2")) +
  guides(fill = guide_legend(ncol = 2, override.aes = list(fill = c("#D55E00", "#0072B2"), color = "transparent"))) +
  xlab(NULL) + 
  ylab("% body fat") + 
  theme_minimal() + 
  theme(legend.position = c(1, 1), legend.justification = c(1, 1), legend.margin = margin(t = 0))

Using patchwork

Code
(barplot + scatterplot) / 
  (boxplot) +
  plot_layout(nrow = 2, heights = c(2, 2))


Using cowplot

Use cowplot::plot_grid() to combine them.

Code
cowplot::plot_grid(plot_grid(barplot, scatterplot, nrow = 1)
  , boxplot
  , nrow = 2)

Exercise 5

Create the following graphic using patchwork.

Hints:

  • Use plots created in Exercise 4
  • inset theme is classic
    • Useful values: 0, 0.45, 0.75, 1
  • plot annotation "A"
Solution
Code
#|: label: ex5-plot

p1 <- scatterplot +
  inset_element(barplot, left = 0.75, bottom = 0, right = 1, top = 0.45) +
  plot_annotation(tag_levels = 'A') + 
  theme_classic()

p1